home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Libris Britannia 4
/
science library(b).zip
/
science library(b)
/
PROGRAMM
/
PASCAL
/
0391B.ZIP
/
NEARMISS.ARC
/
NEARMISS.INC
next >
Wrap
Text File
|
1986-06-23
|
3KB
|
110 lines
{
This function uses a modified version of the soundex algorithm as implemented
by Glen F. Marshall. The changes to Marshall's algorithm are that the
end result is an integer rather than a string, and the algorithm simply
ignores non-alphabet characters.
To this, the NearMiss algorithm is added, which parses an input string
and uses spaces as delimeters to break the string up into substrings, which it
then sends to the soundex1 algorithm. The results are summed into the
resulting real value, and returned to the calling program. In this manner,
you can get a "close enough" match on very long strings, where minor spelling
errors between strings can be accounted for by subtracting one NearMiss
value from the other, and determining the size of the mis-match.
In this manner you can determine how close a near-miss can come to be
considered a match.
Written and placed in the public domain by
John Sims
1643 Calle Lindero
Lompoc, CA 93436
Use, modify, or do with it what you will.... enjoy!
}
Function NearMiss(input_string : anystr) : Real;
var
Beginning, Ending, Whoa : Integer;
Temp : Real;
SCode : anystr;
function soundex1(var name: anystr): integer;
var
work: array[0..3] of char;
code: char;
counter, i,j: integer;
function encode(var c: char): char;
var
r: char;
begin
case upcase(c) of
'B','F','P','V': r := '1';
'C','G','J','K','Q','S','X','Z': r := '2';
'D','T': r := '3';
'L': r := '4';
'M','N': r := '5';
'R': r := '6';
'A','E','I','O','U','Y': r := '7';
'H','W': r := '8';
else r := ' ';
end;
encode := r;
end; {encode}
begin
if length(name) > 0
then work[0] := encode(name[1])
else work[0] := ' ';
if work[0] <> ' '
then i := 2
else i := length(name) + 1;
j := 0; counter := 0;
while (i <= length(name)) and (j < 3) do
begin
code := encode(name[i]);
if code in ['1'..'6']
then if work[j] <> code
then begin
counter := counter + 1;
j := j+1;
work[j] := code;
end;
i := i + 1;
end;
for j := j+1 to 3 do work[j] := '0';
Val(work, counter, j);
Soundex1 := counter;
end; {soundex}
Procedure FindNextBlank;
begin
While (Input_String[Ending] <> ' ') and (Ending <= Whoa) do
Ending := Ending + 1;
End;
Procedure FindNextChar;
begin
While(Input_String[Beginning] = ' ') and (Beginning <= Whoa) do
Beginning := Beginning + 1;
If Beginning > Whoa then Beginning := Whoa
else Ending := Beginning;
End;
begin
Beginning := 1;
Ending := 1;
Whoa := Length(Input_String);
Temp := 0.0;
While Beginning <= Whoa do
begin
FindNextChar;
FindNextBlank;
SCode := Copy(Input_String, Beginning, (Ending - Beginning) + 1);
Temp := Temp + Soundex1(SCode);
Beginning := Ending + 1;
end;
NearMiss := Temp;
End;